% HYPOTHETICAL EXPERIMENT/SIMULATION
% N subjects followed up for t time and measured every dt years
% p1 the risk of developing disease when exposed
% p0 the risk of developing disease when not exposed
% e = probability of being exposed
% we assume that there is no loss to follow-up

%%%% Paper simulation scenario IV: in scenario IV we assumed that a fixed binary confounder C increased the probability of being 
%%%% exposed as well as the risk of disease by ten times in the absent of a causal association between E and D 

clear all

N = 1000;
e = 0.1;  % equal to pe in the article
RR = 1; % the relative risk associated with the exposure
p0 = 0.01;
p1 = RR*p0;
time = 10;
dt = 1;
de = 1; % getting the disease increases the probability of getting exposed to E by de times
cd = 10; % the magnitude with which variable C increases the risk of getting the disease
ce = 10; % the magnitude with which variable C increases the probability of getting exposed

time = time+1; 
age_at_date=zeros(N,time); % Matrix for the biological age at a certain moment in time

%The subject array matrix with 6 layers: 
subject = zeros(N,time,6);
subject(:,:,1)=p0;

% Layer 5 of the subject matrix represents the probability of getting exposed
subject(:,:,5)=e;

% Initialize the random number generator to make the results repeatable.
rng(0,'twister');

% Layer 6 of the subject matrix represents a binary fixed variable C which
% influences both E and D
x = rand(N,1);
for s=1:N
    if x(s)>=0.5 % randomly assign the confounder C to the groups
        subject(s,1:time,6)=1;
        subject(s,1:time,2)=e*ce; 
        subject(s,1:time,3)=p0*cd;
    end
end


ted=zeros(N,2); %ted is a matrix in which column 1 contains the time of first exposure and column 2 the time of disease onset
ted(:,1)= 0; % Default mode is no exposure
ted(:,2)= 0; % Default mode is no disease

ted2=zeros(N,2); %ted2 is a matrix in which column 1 contains the biological age at first exposure and column 2 the biological age at disease onset



for s=1:N  % s is the subject number
 
  for t=1:dt:time-1
  lambdae=subject(s,t,5); % The probability of getting exposed
  lambdad=subject(s,t,1); % The probability of getting the disesae
  wte = -log(rand (1, 1))/lambdae; % Poisson waiting times follow an exponential distribution.
  wtd = -log(rand (1, 1))/lambdad; % Poisson waiting times follow an exponential distribution.    
  
  if (wte<=dt)  % If the time of exposure is within dt than this individual  is 
      subject(s,t+1:time,2)=1;  % exposed until the end of follow-up
      subject(s,t+1:time,1)=p1; % And has a higher probability of getting the disease
  end 
  
  if (wtd<=dt)   % If the time till disease is within dt than this individual has  
      subject(s,t+1:time,3)=1; % the disease
      subject(s,t+1:time,5)= subject(s,t,5)*de; % and has de times higher probability of getting exposed
  end
  
  subject(s,t,4)=t-1; % Layer 4 contains the 'follow-up time' of every individual
  end
end

% s = (1:N);

%plot(s,ted(:,1),'-',s, ted(:,2),'--');

%%%%%% Path to informatics.jar
javaaddpath('C:\Users\Ahmad Aziz\Desktop\Papers\Causal_inference\infodynamics-dist-1.2.1/infodynamics.jar');

%% A 2-dimensional time-window of length tw from the cohort

tw = time-1;
tb=1; % The time at the beginning of the time-window



sourceArray0=subject(:,tb:tb+tw,2); % select the exposure row as source
destArray0=subject(:,tb:tb+tw,3); % select the disease row as destination
% tw2=2^(tw+1);

%% make two large rows
sourceArray1=sourceArray0';
destArray1=destArray0';
sourceArray=sourceArray1(:)';
destArray=destArray1(:)';


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Code from Example 5 - Multivariate transfer entropy on binary data 

%teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', tw2, 1);
%teCalc.initialise();
% We need to construct the joint values of the dest and source before we pass them in,
% and need to use the matrix conversion routine when calling from Matlab/Octave:
%mUtils=javaObject('infodynamics.utils.MatrixUtils');
% teCalc.addObservations(mUtils.computeCombinedValues(octaveToJavaIntMatrix(sourceArray), 2), ...
%		mUtils.computeCombinedValues(octaveToJavaIntMatrix(destArray), 2));

% teCalc.addObservations(mUtils.computeCombinedValues(sourceArray,2), mUtils.computeCombinedValues(destArray,2));

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Code from Example 1 - Transfer entropy on binary data

% global teCalc

teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', 2, tw);
teCalc.initialise();
% Since we have simple arrays of ints, we can directly pass these in:
teCalc.addObservations(sourceArray, destArray);
% Calculation of the TE:
result = teCalc.computeAverageLocalOfObservations();
fprintf('The transfer entropy is %.4f bits.\n', result);


cs = teCalc.computeSignificance(1000);
mean = getMeanOfDistribution(cs)
sd = getStdOfDistribution(cs)
tscore = getTSscore(cs) %Assuming the distribution is Gaussian, return a t-score for our observed measurement
pvalue = cs.pValue
 % % t2=(result2-mean)/sd


% %%%% Bootrstrap cofidence intervals %%%%%%%%%%%%%%%%%% 
% global counter btci_dist
% counter = 0;
% h = @aziz_bootstrapci_bin_transferentropy;
% bci = bootci(100,{h,sourceArray', destArray'}, 'alpha', 0.05, 'type', 'bca')

%dist=cs.distribution;

[correlation,p]=corrcoef(sourceArray,destArray)
%rho=corr(sourceArray',destArray')
%Pearson_corr(s)=pr(1,2);

%%% Save data for performing a cross-tabulation and calculating the
%%% chi-square statistic: a cross-section of the cohort at the end of the follow-up period
ct = time;
exposure=subject(:,ct,2);
disease =subject(:,ct,3);

save('aziz_transferentropy_simulation2.mat', 'exposure', 'disease');


% hist(dist);
% [counts, bins] = hist(dist);
% plot(bins, counts); %# get a line plot of the histogram

% tev is a vector containing the individual transfer entropies